{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src='http://hilpisch.com/taim_logo.png' width=\"350px\" align=\"right\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Artificial Intelligence in Finance"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reinforcement Learning"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "&copy; Dr Yves J Hilpisch | The Python Quants GmbH\n",
    "\n",
    "http://aimachine.io | http://twitter.com/dyjh"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import math\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pylab import plt, mpl\n",
    "plt.style.use('seaborn')\n",
    "mpl.rcParams['savefig.dpi'] = 300\n",
    "mpl.rcParams['font.family'] = 'serif'\n",
    "np.set_printoptions(precision=4, suppress=True)\n",
    "os.environ['PYTHONHASHSEED'] = '0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import tensorflow as tf\n",
    "tf.get_logger().setLevel(logging.ERROR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.python.framework.ops import disable_eager_execution\n",
    "disable_eager_execution()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import Dense, Dropout\n",
    "from keras.models import Sequential\n",
    "from keras.optimizers import Adam, RMSprop\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_seeds(seed=100):\n",
    "    random.seed(seed)\n",
    "    np.random.seed(seed)\n",
    "    tf.random.set_seed(seed)\n",
    "    env.seed(seed)\n",
    "    env.action_space.seed(100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Improved Finance Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class observation_space:\n",
    "    def __init__(self, n):\n",
    "        self.shape = (n,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class action_space:\n",
    "    def __init__(self, n):\n",
    "        self.n = n\n",
    "    def seed(self, seed):\n",
    "        pass\n",
    "    def sample(self):\n",
    "        return random.randint(0, self.n - 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Finance:\n",
    "    url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'\n",
    "    def __init__(self, symbol, features, window, lags,\n",
    "                 leverage=1, min_performance=0.85,\n",
    "                 start=0, end=None, mu=None, std=None):\n",
    "        self.symbol = symbol\n",
    "        self.features = features\n",
    "        self.n_features = len(features)\n",
    "        self.window = window\n",
    "        self.lags = lags\n",
    "        self.leverage = leverage\n",
    "        self.min_performance = min_performance\n",
    "        self.start = start\n",
    "        self.end = end\n",
    "        self.mu = mu\n",
    "        self.std = std\n",
    "        self.observation_space = observation_space(self.lags)\n",
    "        self.action_space = action_space(2)\n",
    "        self._get_data()\n",
    "        self._prepare_data()\n",
    "    def _get_data(self):\n",
    "        self.raw = pd.read_csv(self.url, index_col=0,\n",
    "                               parse_dates=True).dropna()\n",
    "    def _prepare_data(self):\n",
    "        self.data = pd.DataFrame(self.raw[self.symbol])\n",
    "        self.data = self.data.iloc[self.start:]\n",
    "        self.data['r'] = np.log(self.data / self.data.shift(1))\n",
    "        self.data.dropna(inplace=True)\n",
    "        self.data['s'] = self.data[self.symbol].rolling(\n",
    "                                              self.window).mean() \n",
    "        self.data['m'] = self.data['r'].rolling(self.window).mean()\n",
    "        self.data['v'] = self.data['r'].rolling(self.window).std()\n",
    "        self.data.dropna(inplace=True)\n",
    "        if self.mu is None:\n",
    "            self.mu = self.data.mean()\n",
    "            self.std = self.data.std()\n",
    "        self.data_ = (self.data - self.mu) / self.std\n",
    "        self.data_['d'] = np.where(self.data['r'] > 0, 1, 0)\n",
    "        self.data_['d'] = self.data_['d'].astype(int)\n",
    "        if self.end is not None:\n",
    "            self.data = self.data.iloc[:self.end - self.start]\n",
    "            self.data_ = self.data_.iloc[:self.end - self.start]\n",
    "    def _get_state(self):\n",
    "        return self.data_[self.features].iloc[self.bar -\n",
    "                                self.lags:self.bar]\n",
    "    def seed(self, seed):\n",
    "        random.seed(seed)\n",
    "        np.random.seed(seed)\n",
    "    def reset(self):\n",
    "        self.treward = 0\n",
    "        self.accuracy = 0\n",
    "        self.performance = 1\n",
    "        self.bar = self.lags\n",
    "        state = self.data_[self.features].iloc[self.bar-\n",
    "                        self.lags:self.bar]\n",
    "        return state.values\n",
    "    def step(self, action):\n",
    "        correct = action == self.data_['d'].iloc[self.bar]\n",
    "        ret = self.data['r'].iloc[self.bar] * self.leverage\n",
    "        reward_1 = 1 if correct else 0\n",
    "        reward_2 = abs(ret) if correct else -abs(ret)\n",
    "        factor = 1 if correct else -1\n",
    "        self.treward += reward_1\n",
    "        self.bar += 1\n",
    "        self.accuracy = self.treward / (self.bar - self.lags)\n",
    "        self.performance *= math.exp(reward_2)\n",
    "        if self.bar >= len(self.data):\n",
    "            done = True\n",
    "        elif reward_1 == 1:\n",
    "            done = False\n",
    "        elif (self.performance < self.min_performance and\n",
    "              self.bar > self.lags + 5):\n",
    "            done = True\n",
    "        else:\n",
    "            done = False\n",
    "        state = self._get_state()\n",
    "        info = {}\n",
    "        return state.values, reward_1 + reward_2 * 5, done, info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "env = Finance('EUR=', ['EUR=', 'r'], 10, 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = env.action_space.sample()\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.step(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Improved Financial QL Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import deque"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class FQLAgent:\n",
    "    def __init__(self, hidden_units, learning_rate, learn_env, valid_env):\n",
    "        self.learn_env = learn_env\n",
    "        self.valid_env = valid_env\n",
    "        self.epsilon = 1.0\n",
    "        self.epsilon_min = 0.1\n",
    "        self.epsilon_decay = 0.98\n",
    "        self.learning_rate = learning_rate\n",
    "        self.gamma = 0.95\n",
    "        self.batch_size = 128\n",
    "        self.max_treward = 0\n",
    "        self.trewards = list()\n",
    "        self.averages = list()\n",
    "        self.performances = list()\n",
    "        self.aperformances = list()\n",
    "        self.vperformances = list()\n",
    "        self.memory = deque(maxlen=2000)\n",
    "        self.model = self._build_model(hidden_units, learning_rate)\n",
    "        \n",
    "    def _build_model(self, hu, lr):\n",
    "        model = Sequential()\n",
    "        model.add(Dense(hu, input_shape=(\n",
    "            self.learn_env.lags, self.learn_env.n_features),\n",
    "                        activation='relu'))\n",
    "        model.add(Dropout(0.3, seed=100))\n",
    "        model.add(Dense(hu, activation='relu'))\n",
    "        model.add(Dropout(0.3, seed=100))\n",
    "        model.add(Dense(2, activation='linear'))\n",
    "        model.compile(\n",
    "            loss='mse',\n",
    "            optimizer=RMSprop(lr=lr)\n",
    "        )\n",
    "        return model\n",
    "        \n",
    "    def act(self, state):\n",
    "        if random.random() <= self.epsilon:\n",
    "            return self.learn_env.action_space.sample()\n",
    "        action = self.model.predict(state)[0, 0]\n",
    "        return np.argmax(action)\n",
    "    \n",
    "    def replay(self):\n",
    "        batch = random.sample(self.memory, self.batch_size)\n",
    "        for state, action, reward, next_state, done in batch:\n",
    "            if not done:\n",
    "                reward += self.gamma * np.amax(\n",
    "                    self.model.predict(next_state)[0, 0])\n",
    "            target = self.model.predict(state)\n",
    "            target[0, 0, action] = reward\n",
    "            self.model.fit(state, target, epochs=1,\n",
    "                           verbose=False)\n",
    "        if self.epsilon > self.epsilon_min:\n",
    "            self.epsilon *= self.epsilon_decay\n",
    "    \n",
    "    def learn(self, episodes):\n",
    "        for e in range(1, episodes + 1):\n",
    "            state = self.learn_env.reset()\n",
    "            state = np.reshape(state, [1, self.learn_env.lags,\n",
    "                                       self.learn_env.n_features])\n",
    "            for _ in range(10000):\n",
    "                action = self.act(state)\n",
    "                next_state, reward, done, info = \\\n",
    "                                self.learn_env.step(action)\n",
    "                next_state = np.reshape(next_state,\n",
    "                                [1, self.learn_env.lags,\n",
    "                                 self.learn_env.n_features])\n",
    "                self.memory.append([state, action, reward,\n",
    "                                     next_state, done])\n",
    "                state = next_state\n",
    "                if done:\n",
    "                    treward = _ + 1\n",
    "                    self.trewards.append(treward)\n",
    "                    av = sum(self.trewards[-25:]) / 25\n",
    "                    perf = self.learn_env.performance\n",
    "                    self.averages.append(av)\n",
    "                    self.performances.append(perf)\n",
    "                    self.aperformances.append(\n",
    "                        sum(self.performances[-25:]) / 25)\n",
    "                    self.max_treward = max(self.max_treward, treward)\n",
    "                    templ = 'episode: {:2d}/{} | treward: {:4d} | '\n",
    "                    templ += 'perf: {:5.3f} | av: {:5.1f} | max: {:4d}'\n",
    "                    print(templ.format(e, episodes, treward, perf,\n",
    "                                  av, self.max_treward), end='\\r')\n",
    "                    break\n",
    "            self.validate(e, episodes)\n",
    "            if len(self.memory) > self.batch_size:\n",
    "                self.replay()\n",
    "    def validate(self, e, episodes):\n",
    "        state = self.valid_env.reset()\n",
    "        state = np.reshape(state, [1, self.valid_env.lags,\n",
    "                                   self.valid_env.n_features])\n",
    "        for _ in range(10000):\n",
    "            action = np.argmax(self.model.predict(state)[0, 0])\n",
    "            next_state, reward, done, info = self.valid_env.step(action)\n",
    "            state = np.reshape(next_state, [1, self.valid_env.lags,\n",
    "                                   self.valid_env.n_features])\n",
    "            if done:\n",
    "                treward = _ + 1\n",
    "                perf = self.valid_env.performance\n",
    "                self.vperformances.append(perf)\n",
    "                if e % 20 == 0:\n",
    "                    templ = 71 * '='\n",
    "                    templ += '\\nepisode: {:2d}/{} | VALIDATION | '\n",
    "                    templ += 'treward: {:4d} | perf: {:5.3f} | '\n",
    "                    templ += 'eps: {:.2f}\\n'\n",
    "                    templ += 71 * '='\n",
    "                    print(templ.format(e, episodes, treward,\n",
    "                                       perf, self.epsilon))\n",
    "                break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "symbol = 'EUR='\n",
    "features = [symbol, 'r', 's', 'm', 'v']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = 0\n",
    "b = 2000\n",
    "c = 500"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn_env = Finance(symbol, features, window=10, lags=6,\n",
    "                 leverage=1, min_performance=0.85,\n",
    "                 start=a, end=a + b, mu=None, std=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn_env.data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid_env = Finance(symbol, features, window=learn_env.window,\n",
    "                 lags=learn_env.lags, leverage=learn_env.leverage,\n",
    "                 min_performance=learn_env.min_performance,\n",
    "                 start=a + b, end=a + b + c,\n",
    "                 mu=learn_env.mu, std=learn_env.std)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid_env.data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seeds(100)\n",
    "agent = FQLAgent(24, 0.0001, learn_env, valid_env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "episodes = 61"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time agent.learn(episodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.epsilon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10, 6))\n",
    "x = range(1, len(agent.averages) + 1)\n",
    "y = np.polyval(np.polyfit(x, agent.averages, deg=3), x)\n",
    "plt.plot(agent.averages, label='moving average')\n",
    "plt.plot(x, y, 'r--', label='regression')\n",
    "plt.xlabel('episodes')\n",
    "plt.ylabel('total reward')\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10, 6))\n",
    "x = range(1, len(agent.performances) + 1)\n",
    "y = np.polyval(np.polyfit(x, agent.performances, deg=3), x)\n",
    "y_ = np.polyval(np.polyfit(x, agent.vperformances, deg=3), x)\n",
    "plt.plot(agent.performances[:], label='training')\n",
    "plt.plot(agent.vperformances[:], label='validation')\n",
    "plt.plot(x, y, 'r--', label='regression (train)')\n",
    "plt.plot(x, y_, 'r-.', label='regression (valid)')\n",
    "plt.xlabel('episodes')\n",
    "plt.ylabel('gross performance')\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src=\"http://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>\n",
    "\n",
    "<a href=\"http://tpq.io\" target=\"_blank\">http://tpq.io</a> | <a href=\"http://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:training@tpq.io\">training@tpq.io</a>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
